********************************************************************************
** Inflation Expectations Comparison, Nondurables
********************************************************************************
clear
clear matrix
set more off
set scheme s1color
estimates clear
graph drop _all
set matsize 2500
log close _all
graph set window fontface "Times New Roman"

** Set Directory
cd "../Do"
** Set Haver Directory
// set haverdir "Haver", perm

********************************************************************************
** HAVER PULL DATA
********************************************************************************
** Import Data
*MB importing data now through October 2013 (instead of only through October 2012)
*note that inflation values are monthly--could try changing tvar to yearq and do 4-quarter inflation instead of 12-month inflation
import haver (pcuslfe pztexp jcxfebm pzrgus cinf1)@usecon , tvar(yearmo) clear fin(2008m9, 2013m10)
ren pcuslfe_usecon cpiu
ren pztexp_usecon pztexp // deflate by pce 
ren jcxfebm_usecon pce
ren pzrgus_usecon reggas
ren cinf1_usecon mich
*MB changing denominator here to L12.cpiu instead of cpiu
tsset yearmo
*gen inflation_yearly = ((cpiu - L12.cpiu) / cpiu)*100
gen inflation_yearly = ((cpiu - L12.cpiu) / L12.cpiu)*100
la var inflation_yearly "Actual Inflation"
gen inflation_yoy_forward = F12.inflation_yearly
gen inflation_yoy_forward2 = ((F12.cpiu-cpiu)/cpiu)*100

gen wti = pztexp/pce
*WTI stands for West Texas Intermediate Crude Oil Price
la var wti "WTI, Deflated by PCE"
*reggas= "regular gas"?
gen reggas2 = reggas/100

*extrdate year year = yearmo
*extrdate quarter quarter = yearmo
gen year=yofd(dofm(yearmo))
gen yearq=qofd(dofm(yearmo))
format yearq %tq

keep if yearmo>=tm(2009m10)
keep if yearmo<=tm(2012m11)
tempfile haverd
save `haverd'

********************************************************************************
** LOAD AND CLEAN NONDURABLES DATA
********************************************************************************
use ../Data/matched_data_nondurables_jun2018_baseline.dta, clear

*list of spending categories that will add up to nondurables (first version excludes only sports, which is excluded because it has durable goods in it)
#delimit ;
local spendcats "
electricity water heatingfuel phonecable housecleaningproducts housecleaningservice
gardenproducts gardenservice clothing personalcare drugs healthcareservices medsupplies entertainment
hobbies personalservices otherchildspending foodhome foodout gasoline";
#delimit cr

*based on content of "spendcats" above, nondurables defined as all included in that group except sports
egen nondurables = rowtotal(`spendcats')

*not that nondurables has one outlier value over 29000 (for monthly spending on nondurable goods/services) 
*before running some regressions, extreme values for nondurables will be omitted; a small number of zeroes will be dropped

drop ethnicity

recode mort (5=0)
recode stocks (5=0)
recode retacct (5=0)
recode howner (5=0)

* Recode some expectations variables from the Inflation surveys. 

* unemployment dummies
recode q6 (1=1) (2 3 = 0), gen(unemp_increase)
recode q6 (3=1) (1 2 = 0), gen(unemp_decrease)

gen conditions_12m = q2a
* note everybody who says "other" is in separate category
recode q2a (1=1) (2 3 = 0), gen(conditions_12m_better)
recode q2a (2=1) (1 3 = 0), gen(conditions_12m_worse)

gen interestrate_12m = q7
recode interestrate_12m (1=1) (2 3 = 0), gen(intrate_12m_up)
recode interestrate_12m (3=1) (1 2 = 0), gen(intrate_12m_down)

gen bconditions_12m = q4
* note everybody who says "other" is in separate category
recode q4 (1=1) (2 3 = 0), gen(bconditions_12m_better)
recode q4 (2=1) (1 3 = 0), gen(bconditions_12m_worse)

* house price forecasts
gen hppoint = .
replace hppoint = 0 if q41==3
replace hppoint = q42 if q41==1
replace hppoint = -q42 if q41==2
* there are some extreme outliers (in the tens of thousands)
replace hppoint = . if abs(hppoint)>=200
la var hppoint "House price expectation"

* Deflation: using 2012q1 PCUN (nondurables CPI): generate a new variable so regressions can be run on nominal and real data alternatively
* from Arman: note that PCUN was redefined in the haver do file to give 2012q1 dollars

rename spend_month date_monthly
sort date_monthly
merge m:1 date_monthly using "../Data/haver_m.dta"

tab _merge
drop if _merge==2
drop _merge

*MB: my simple deflation command: just deflating my main measure of nondurables
gen nondurables_real=nondurables/PCUN
rename date_monthly spend_month
*note real values are only a little higher on average than nominal values

*** Prepare some additional descriptives
* Race isn't reported in all periods. 
preserve
collapse (mean) race , by(prim_key)
sort prim_key
tempfile race
save `race'
restore

drop race
sort prim_key
merge m:1 prim_key using `race'
tab _merge
drop _merge
drop if prim_key==""

recode race (1=1) (nonmissing = 0), gen(white)
gen nonwhite = 1-white
recode gender (2=1) (1=0), gen(female)
recode highesteducation (4 9 = 0) (10/16 = 1), gen(coll)

* q31s* gives codes. Kind of odd that they've created separate variables. 
*MB: odd to generate employed variable, because everyone in sample is employed--however some also say they are retired (?) 
gen employed = . 
replace employed=1 if q31s1==1
replace employed = 0 if (q31s2==2 | q31s3==3 | q31s4==4 | q31s5==5 | q31s6==6 | q31s7==7) & q31s1!=1


* generate currently retired variable.
*odd that some are retired even though all are employed; how do retired people have wage growth expectations? they may be working a small job anyway
drop retired 
gen retired = . 
replace retired = 1 if q31s5==5
replace retired = 0 if (q31s1==1 | q31s2==2 | q31s3==3 | q31s4==4 | q31s6==6 | q31s7==7) & q31s5!=5
tab retired

*more preparation for regressions
local expectations "intrate_12m_up intrate_12m_down unemp_increase unemp_decrease rw_expect d_wageiqr hppoint"
local infl1 "d_inflmedian d_infliqr"
local infl2 "d_longinflmedian d_longinfliqr"

*MBwhen using lagss--may need to rename lagged vars or these locals to match
*local lags1 "lagE_d_inflmedian lagE_d_infliqr" ;
*local lags2 "lagE_d_longinflmedian lagE_d_longinfliqr" ;

local spec1 "Short-Run Infl. Exp."
local spec2 "Medium-Run Infl. Exp."
la var d_inflmedian "Inflation Expectations"
la var d_infliqr "Inflation Uncertainty"

*replace howner=0 if howner==.
*adds ten people to homeowner status; there are still some with howner_fix==0 who have positive mortgage payment, but missing data for has mortgage and/or mortgage amount
*two observations have howner==0 and mort==1 (say they're not a homeowner but they say they have a mortgage); not important so not recoding them for now

*the mortgage dummy is non-missing for over 2000 observations; however the mortgage balance (amtmort) is observed only for 990 people
*tab mort
*reasonable recode of mortgage dummy based on other information
replace mort = 0 if (howner!=1 | mortgage==0) & mort==.
replace mort = 1 if (howner==1 | (mortgage>0 & mortgage!=.)) & mort==.
la var mort "Mortgage Indicator"

*MB: generating quasi-continuous income variable based on midpoint of ranges of annual household income variables (familyincome and familyincome_part2)
rename familyincome inc2
rename familyincome_part2 inc2_2
drop if inc2==.
*here is the income variable: "new_faminc"
gen new_faminc=.
replace new_faminc=2500 if inc2==1
replace new_faminc=6250 if inc2==2
replace new_faminc=8750 if inc2==3
replace new_faminc=11250 if inc2==4
replace new_faminc=13750 if inc2==5
replace new_faminc=17500 if inc2==6
replace new_faminc=22500 if inc2==7
replace new_faminc=27500 if inc2==8
replace new_faminc=32500 if inc2==9
replace new_faminc=37500 if inc2==10
replace new_faminc=45000 if inc2==11
replace new_faminc=55000 if inc2==12
replace new_faminc=67500 if inc2==13
replace new_faminc=87500 if inc2==14 & (inc2_2==1 | inc2_2==.)
*above accounts for one person with inc2==14 and inc2_2 missing; not sure why that's the case but I asigned them the lowest category of income over $75000
replace new_faminc=112500 if inc2==14 & inc2_2==2
replace new_faminc=162500 if inc2==14 & inc2_2==3
replace new_faminc=237500 if inc2==14 & inc2_2==4

gen log_new_faminc=log(new_faminc)


sort prim_key spend_month
*bringing in SAMPLE WEIGHTS, to monthly data: warning, we might lose observations if the set requiring weights has changed ; this will affect our ability to run regs using non-employed types (can only do unweighted) 
merge m:1 prim_key using ../Data/mweights_pooled
*these are the reg weights, the full sample weights will still be called weight_full
*weight variable is called "weight_samp" to indicate the weights were designed for the regression sample
*195 observations dropped that didn't merge with a weight_samp
drop if _merge!=3
drop _merge

*define regression sample before recentering any variables
*drop people with extreme values for spending, inflation expectations, mortgage payment that looks like a mortgage balance
*outliers identified by Ali: turn on as alternative MB: the below prim_key doesn't look suspicious to me---this drop may have been related to the previous (bad) income variable
*drop if prim_key=="5041140:1"
*drops 
drop if mortgage>200000 & mortgage!=.
*drops 17 observations
drop if d_inflmedian>35
*drops 13 observations
drop if nondurables==0
*drops 1 observation
drop if nondurables>28000

*assigning locals for weights (can turn on or off in regression)
local weights "[pweight=weight_samp]"
local weights_full "[pweight=weight_full]"
local pwfile "_pw"
*drop those with missing values for regressors
la var d_inflmedian "Inflation Expectation"
la var d_infliqr "Inflation Uncertainty"
la var d_longinflmedian "Inflation Expectation"
la var d_longinfliqr "Inflation Uncertainty"
la var lag_IE "Lagged Inflation Expectation"
la var lag_infl_iqr "Lagged Infl. Uncertainty"

*variable for sum of monthly payments--interactions between this variable and IE may be included in some models
gen payments=mortgage+car if howner==1
replace payments=rent+car if howner!=1
gen log_payments=.
replace log_payments=log(payments) if payments>0
replace log_payments=0 if payments==0
la var log_payments "Monthly Payments (Log)"
*drop one observation with extreme value for payments (110,000): only if running a regression interacting with payments
drop if payments>100000

*defining sample; removing those with missing values
drop if weight_samp==.
drop if d_inflmedian==.
drop if d_infliqr==.
drop if nondurables==.
*dropping lagged IE: only need if we include lag IE in regression 
drop if lag_IE==.
drop if lag_infl_iqr==.
*new income variable: new_faminc is recode of categorical variables familyincome and familyincome_part2; former variable was earnings last month and highly unreliable
drop if new_faminc==.
drop if intrate_12m_up==.
drop if intrate_12m_down==.
drop if unemp_increase==.
drop if unemp_decrease==.
drop if rw_expect==.
drop if d_wageiqr==.
drop if rage==.
drop if nonwhite==.
drop if female==.
drop if coll==.
drop if retired==.
drop if mort==.
*alternative: comment out the last two drops and omit howner and hppoint from regressions 
*below results in loss of 300+ observations--will retain alternative and omit hppoint from the regression
drop if hppoint==.
*this drops 111 observations--can run alternative model without howner dummy and include these 
drop if howner==.

destring prim_key, generate(id_new) ignore(":")

*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs=1
*generate sum of observations per person
egen obs=total(pre_obs), by(id_new)
sum obs, d
*define sample  based on sufficient observation
gen nondurables_sample_base=(obs>=3)

*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs2=1
*generate sum of observations per person
egen obs2=total(pre_obs2), by(id_new)
sum obs2, d
*define sample  based on sufficient observation
gen nondurables_sample_hp=(obs2>=3)

gen year2 = year(exp_date)
gen month2 = month(exp_date)
gen yearmo = ym(year2, month2)
format yearmo %tm

*collapses to monthly median of individual (median) IE attached to each expectations month 
collapse (p50) d_inflmedian if nondurables_sample_hp==1 [pweight=weight_samp], by(yearmo)

tsset yearmo
*MB no longer creating the lagged IE series
*gen L_lag_IE = l12.d_inflmedian //Lagged Median inflation expectations

ren d_inflmedian d_inflmedian_ndur
*ren L_lag_IE L_lag_IE_ndur

tempfile nondur 
save `nondur'

********************************************************************************
** CREATE FIGURE
********************************************************************************

use `nondur', clear
merge 1:1 yearmo using `haverd', gen(havermatch)
sort yearmo
*edited MB October 2019
*edited MB Jan 2021
** Graphs
*Figure A1. Inflation Expectations Comparison--Nondurables spending sample
#delimit ; 
twoway 
(connected d_inflmedian_ndur yearmo, mcolor(blue) msize(small) lcolor(blue) yaxis(1))
(connected mich yearmo, mcolor(red) msize(small) lcolor(red) yaxis(1)),
ylabel(, angle(horizontal) labsize(small) axis(1)) ytitle("Percent", axis(1) size(small))
tlabel(2009m10 "2009m10" 2010m10 "2010m10" 2011m10 "2011m10" 2012m10 "2012m10", angle(horizontal) labsize(small)) xtitle("") 
/*title("Figure A1. RAND{stMono:-}ALP Inflation Expectations vs. Michigan Inflation Expectations ", size(medsmall)) */
legend(region(lstyle(none)) rows(3) colfirst symx(*.4) size(small) 
order(1 "RAND{stMono:-}ALP Median Inflation Expectation (Nondurables Sample)" 2 "Michigan Survey Median Inflation Expectation")) name(g1, replace)
/*note("Notes: Median inflation expectation refers to the median one{stMono:-}year{stMono:-}ahead inflation expectation for the given month,"
"based on the dates on which subjects completed the expectations surveys.", size(vsmall))*/
;
#delimit cr

graph export ../Figures/figureA1.png, as(png) replace

* B&W version

set scheme s1mono

#delimit ; 
twoway 
(connected d_inflmedian_ndur yearmo, msize(small) yaxis(1))
(connected mich yearmo, msize(small) yaxis(1)),
ylabel(, angle(horizontal) labsize(small) axis(1)) ytitle("Percent", axis(1) size(small))
tlabel(2009m10 "2009m10" 2010m10 "2010m10" 2011m10 "2011m10" 2012m10 "2012m10", angle(horizontal) labsize(small)) xtitle("") 
/*title("Figure A1. RAND{stMono:-}ALP Inflation Expectations vs. Michigan Inflation Expectations ", size(medsmall)) */
legend(region(lstyle(none)) rows(3) colfirst symx(*.4) size(small) 
order(1 "RAND{stMono:-}ALP Median Inflation Expectation (Nondurables Sample)" 2 "Michigan Survey Median Inflation Expectation")) name(g1, replace)
/*note("Notes: Median inflation expectation refers to the median one{stMono:-}year{stMono:-}ahead inflation expectation for the given month,"
"based on the dates on which subjects completed the expectations surveys.", size(vsmall))*/
;
#delimit cr

graph export ../Figures/figureA1_bw.png, as(png) replace


